Univariable
# sample size: open and close days ---------------------------------------
data.frame(obs_days = nrow(df),
open_days = sum(df$is_closed),
closed_days = sum(!df$is_closed))
## obs_days open_days closed_days
## 1 169 8 161
# basic summary: dependents ----------------------------------------------------
data.frame(food_loss_waste = c(summary(df$food_loss_kg + df$food_waste_kg)),
food_loss = c(summary(df$food_loss_kg)),
food_waste_all = c(summary(df$food_waste_kg)),
food_waste_liquid = c(summary(df$liquid_waste_kg)),
food_waste_solid = c(summary(df$solid_waste_kg)))
## food_loss_waste food_loss food_waste_all food_waste_liquid
## Min. 0.000000 0.000000 0.000000 0.000000
## 1st Qu. 8.250000 6.600000 0.950000 0.550000
## Median 9.500000 7.300000 1.950000 1.400000
## Mean 9.543491 7.460355 2.083136 1.408876
## 3rd Qu. 11.050000 8.150000 2.900000 2.000000
## Max. 17.900000 13.800000 6.550000 4.500000
## food_waste_solid
## Min. 0.0000000
## 1st Qu. 0.3500000
## Median 0.6000000
## Mean 0.6742604
## 3rd Qu. 0.9000000
## Max. 2.9500000
# basic summary: dependents excluding closed days ------------------------------
data.frame(food_loss_waste = c(summary(df$food_loss_kg[!df$is_closed]
+ df$food_waste_kg[!df$is_closed])),
food_loss = c(summary(df$food_loss_kg[!df$is_closed])),
food_waste_all = c(summary(df$food_waste_kg[!df$is_closed])),
food_waste_liquid = c(summary(df$liquid_waste_kg[!df$is_closed])),
food_waste_solid = c(summary(df$solid_waste_kg[!df$is_closed])))
## food_loss_waste food_loss food_waste_all food_waste_liquid
## Min. 0.0000 0.000000 0.000000 0.000000
## 1st Qu. 8.4000 6.700000 1.100000 0.650000
## Median 9.6500 7.350000 2.100000 1.500000
## Mean 10.0177 7.831056 2.186646 1.478882
## 3rd Qu. 11.1500 8.400000 2.950000 2.050000
## Max. 17.9000 13.800000 6.550000 4.500000
## food_waste_solid
## Min. 0.000000
## 1st Qu. 0.350000
## Median 0.650000
## Mean 0.707764
## 3rd Qu. 0.950000
## Max. 2.950000
# summary of--------------------
# 1. number of observations
# 2. Averages
# 3. standard deviations
# 4. Min values
# 4. Max values
stargazer(subset(df[4:7], df$is_closed == FALSE), flip=TRUE,
type = "text",digits=2, out="deps1.txt")
##
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N 161 161 161 161
## Mean 7.83 2.19 0.71 1.48
## St. Dev. 2.17 1.40 0.50 1.00
## Min 0.00 0.00 0.00 0.00
## Max 13.80 6.55 2.95 4.50
## -------------------------------------------------------------------
# Excluding the restaurant closed ---------------
stargazer(subset(df[4:7], df$is_closed == FALSE), flip=TRUE,
type = "text",digits=2, out="deps2.txt")
##
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N 161 161 161 161
## Mean 7.83 2.19 0.71 1.48
## St. Dev. 2.17 1.40 0.50 1.00
## Min 0.00 0.00 0.00 0.00
## Max 13.80 6.55 2.95 4.50
## -------------------------------------------------------------------
Histograms
# bw <- 2 * IQR(df$food_loss_kg) / length(df$food_loss_kg)^(1/3)
# binwidth = bw
# Histogram on food loss + food waste ------------------------------------------
hist_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x = food_loss_kg + food_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Loss and Food Waste - Histogram")
hist_loss_waste

# Histogram on food loss----------------------------------------------------
hist_loss <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_loss_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Loss - Histogram")
hist_loss

# Histogram of food waste ----------------------------------------------------
hist_food_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Waste - Histogram")
hist_food_waste

# Histogram of solid waste ----------------------------------------------------
hist_solid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = solid_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Solid Food Waste - Histogram")
hist_solid_waste

# Histogram of liquid waste ----------------------------------------------------
hist_liquid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = liquid_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Liquid Food Waste - Histogram")
hist_liquid_waste

grid.arrange(hist_loss_waste,hist_loss,
hist_food_waste,hist_solid_waste,hist_liquid_waste)

Time Series Plots
df$month_name <- format(as.Date(df$date), format = "%b")
# head(df$month); tail(df$month)
df$month_name = factor(df$month_name,
levels = c('Sep','Oct','Nov','Dec',
'Jan','Feb','Mar'))
df$day_name <- format(as.Date(df$date), format = "%d")
# Daily Plot on food loss + food waste ---------------------------------
daily_loss_waste <-
ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg + food_waste_kg)) +
geom_line(aes(group = 1), color="dark blue") +
geom_point(aes(shape = is_closed)) +
scale_shape_manual(values=c(16, 4)) +
scale_x_date(date_labels = "%b %d") +
theme(legend.position = c(0.05,0.15)) +
xlab("Date") + ylab("Daily Food Loss and Waste (kg)") +
ggtitle("Daily Food Loss and Waste Trend")
daily_loss_waste

# Daily Plot on food loss ------------------------------------------------
daily_loss <-
ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg)) +
geom_line(color="blue") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.9,0.85)) +
xlab("Date") + ylab("Daily Food Loss (kg)") +
ggtitle("Daily Food Loss Trend")
daily_loss

# Daily Plot on food waste -----------------------------------------------
daily_waste <-
ggplot(data = df, aes(x = as.Date(date), y = food_waste_kg)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Food Waste (kg)") +
ggtitle("Daily Food Waste Trend")
daily_waste

# Daily Plot on solid food waste -----------------------------------------
daily_solid_waste <-
ggplot(data = df, aes(x = as.Date(date), y = solid_waste_kg)) +
geom_line(color="dark orange") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Solid Food Waste (kg)") +
ggtitle("Daily Solid Food Waste Trend")
daily_solid_waste

# Daily Plot on liquid food waste ----------------------------------------
daily_liquid_waste <-
ggplot(data = df, aes(x = as.Date(date), y = liquid_waste_kg)) +
geom_line(color="dark blue") +
geom_line(color="blue", linetype = "dashed") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Liquid ood Waste (kg)") +
ggtitle("Daily Liquid Food Waste Trend")
daily_liquid_waste

grid.arrange(daily_loss_waste,daily_loss, daily_waste,
daily_solid_waste,daily_liquid_waste)

# Monthly Plot on food loss and food waste ---------------------------------
monthly_loss_waste <-
ggplot(data = df, aes(x = day_name,
y = food_loss_kg + food_waste_kg, group=1)) +
geom_line(color="dark blue") +
geom_point(aes(shape = is_closed)) +
scale_shape_manual(values=c(16, 4)) +
theme(legend.position = "none") +
# geom_rect(data = df, aes(xmin = date, xmax = dplyr::lead(date),
# ymin = -Inf, ymax = Inf,
# fill = factor(!is_closed)), alpha = .3) +
facet_grid(month_name~.) +
xlab("Date") + ylab("Monthly Food Loss and Waste (kg)") +
ggtitle("Monthly Food Loss and Waste Trend")
monthly_loss_waste

# Monthly Plot on food loss ------------------------------------------------
monthly_loss <-
ggplot(data = df, aes(x = day_name, y = food_loss_kg, group=1)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Food Loss (kg)") +
ggtitle("Monthly Food Loss Trend")
monthly_loss

# Monthly Plot on food waste -----------------------------------------------
monthly_waste <-
ggplot(data = df, aes(x = day_name, y = food_waste_kg, group=1)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Food Waste (kg)") +
ggtitle("Monthly Food Waste Trend")
monthly_waste

# Monthly Plot on solid food waste -----------------------------------------
monthly_solid_waste <-
ggplot(data = df, aes(x = day_name, y = solid_waste_kg, group=1)) +
geom_line(color="dark orange") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Solid Food Waste (kg)") +
ggtitle("Monthly Solid Food Waste Trend")
monthly_solid_waste

# Monthly Plot on liquid food waste ----------------------------------------
monthly_liquid_waste <-
ggplot(data = df, aes(x = day_name, y = liquid_waste_kg, group=1)) +
geom_line(color="blue") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Liquid ood Waste (kg)") +
ggtitle("Monthly Liquid Food Waste Trend")
monthly_liquid_waste

# grid.arrange(monthly_loss_waste,monthly_loss, monthly_waste,
# monthly_solid_waste,monthly_liquid_waste)
Boxplots
# weekly boxplot on food loss + food waste ----------------------------
boxplot_week_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_loss_kg + food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Food Loss and Food Waste in Day of the Week",
x = "Week of Day", y = "Food Loss and Food Waste in kg")
boxplot_week_loss_waste

# weekly boxplot on food loss ----------------------------------
boxplot_week_food_loss <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_loss_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Food Loss in Day of the Week",
x = "Week of Day", y = "Food Loss in kg")
boxplot_week_food_loss

# weekly boxplot on food waste ------------------------------------
boxplot_week_food_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of All Food Waste in Day of the Week",
x = "Week of Day", y = "Food Waste in kg")
boxplot_week_food_waste

# weekly boxplot on solid food waste ------------------------------------
boxplot_week_solidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=solid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Solid Food Waste in Day of the Week",
x = "Week of Day", y = "Solid Food Waste in kg")
boxplot_week_solidWaste

# weekly boxplot on liquid food waste ------------------------------------
boxplot_week_liquidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=liquid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Liquid Food Waste in Day of the Week",
x = "Week of Day", y = "Liquid Food Waste in kg")
boxplot_week_liquidWaste

grid.arrange(boxplot_week_food_loss,boxplot_week_food_waste,
boxplot_week_solidWaste,boxplot_week_liquidWaste)

# monthly boxplot on food loss + food waste ------------------------------------
boxplot_month_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_loss_kg+food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Food Loss and Food Waste in Month",
x = "Month", y = "Food Loss and Waste in kg")
boxplot_month_loss_waste

# monthly boxplot on food loss ------------------------------------
boxplot_month_loss <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_loss_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Food Loss in Month",
x = "Month", y = "Food Waste in kg")
boxplot_month_loss

# monthly boxplot on food waste ------------------------------------
boxplot_month_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Daily Food Waste in Month",
x = "Month", y = "Food Waste in kg")
boxplot_month_waste

# monthly boxplot on solid food waste ------------------------------------
boxplot_month_solidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=solid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Daily Solid Food Waste in Month",
x = "Monthy", y = "Solid Food Waste in kg")
boxplot_month_solidWaste

# boxplot of week of day for solid food waste ------------------------------------
boxplot_month_liquidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=liquid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Liquid Food Waste in Month",
x = "Month", y = "Liquid Food Waste in kg")
boxplot_month_liquidWaste

grid.arrange(boxplot_month_loss_waste, boxplot_month_loss,boxplot_month_waste,
boxplot_month_solidWaste,boxplot_month_liquidWaste)

Time Series Plots for Independents
## Time Series plots of:
# 1. weather conditions: temperature, humidity, precipitation
# 2. # orders + dine in + size + liquor + daily sales (confident)
# Time Series Plot on temperature ---------------------------------
tsPlot_temp <-
ggplot(data = df, aes(x = as.Date(date), y = temp_c)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(aes(group = 1), color="orange") +
geom_hline(aes(yintercept = 22), linetype='dotted') +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Temperature in Degree Celsius") +
ggtitle("Daily Average Hourly Temperature Plot")
tsPlot_temp
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on gap temperature with 22C---------------------------------
tsPlot_temp_gap <-
ggplot(data = df, aes(x = as.Date(date), y = temp_c-22)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="green") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Gap Temperature in Degree Celsius") +
ggtitle("Daily Gap Temperature Plot")
tsPlot_temp_gap
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on humidity ---------------------------------
tsPlot_humidity <-
ggplot(data = df, aes(x = as.Date(date), y = humi_p)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="red") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Humidity in Percent") +
ggtitle("Daily Humidity Plot")
tsPlot_humidity
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on precipitation ---------------------------------
tsPlot_precip <-
ggplot(data = df, aes(x = as.Date(date), y = prcp_mm)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="blue") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Precipitation in millimetre") +
ggtitle("Daily Precipitation Plot")
tsPlot_precip
## `geom_smooth()` using formula = 'y ~ x'

grid.arrange(tsPlot_temp,tsPlot_temp_gap,tsPlot_humidity, tsPlot_precip)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## Time Series plots of:
# 1. # orders (full, half, takeouts)
# 2. daily dine in served (kg)
# 4. liquor
# 5. daily sales (confident)
# Time Series Plot on Meal Orders ---------------------------------
tsPlot_total_orders <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x=as.Date(date))) +
geom_line(aes(y = fulls, color="fulls")) +
geom_line(aes(y = halfs, color="halfs")) +
scale_x_date(date_labels = "%b %d") +
geom_line(aes(y = takeouts, color="takeouts"), linetype = "dashed") +
xlab("Date") + ylab("Daily Number of Meal Orders") +
ggtitle("Daily Different Package Meal Orders Plot")+
scale_color_manual(name='Packages',
breaks=c('fulls', 'halfs', 'takeouts'),
values=c('fulls' = 'dark blue',
'halfs' = 'purple',
'takeouts'='dark red')) +
theme(legend.position = "right")
tsPlot_total_orders

# Time Series Plot on demand and production ---------------------------------
tsPlot_D_S <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
geom_line(aes(y = daily_total_served), color="dark blue") +
geom_line(aes(y = FL_FP_kg), color="dark red", linetype = "dashed") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Daily Quantity in kg") +
ggtitle("Daily Total Served and Production Plot") +
scale_color_manual(name='Served',
breaks=c('daily_total_served', 'FL_FP_kg'),
values=c('daily_total_served' = 'dark blue',
'FL_FP_kg' = 'dark red')) +
theme(legend.position = "right")
tsPlot_D_S

tsPlot_diff_D_S <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
geom_line(aes(y = daily_total_served - FL_FP_kg), color="black") +
stat_smooth(aes(y = daily_total_served - FL_FP_kg), method = "loess",
color = "light green", fill = "light green") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Daily Inventory in kg") +
ggtitle("Difference Between Total Served and Production Plot")
tsPlot_diff_D_S
## `geom_smooth()` using formula = 'y ~ x'

# Time Series Plot on precipitation ---------------------------------
tsPlot_sales <-
ggplot(data = subset(df, is_closed %in% FALSE),aes(x = as.Date(date))) +
geom_line(aes(y = sales), color="purple") +
scale_x_date(date_labels = "%b %d") +
stat_smooth(aes(y = sales), method = "loess",
color = "light green", fill = "light green") +
xlab("Date") + ylab("Daily Sales in dollar") +
ggtitle("Daily Sales Plot")
tsPlot_sales
## `geom_smooth()` using formula = 'y ~ x'

grid.arrange(tsPlot_total_orders,tsPlot_D_S, tsPlot_diff_D_S,tsPlot_sales)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

rm(list = ls()[! ls() %in% c("df", "AdjMat")])